{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## algorithm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class HashTable:\n",
    "\n",
    "    ratio_expand = .7\n",
    "    ratio_shrink = .2\n",
    "    min_size = 11\n",
    "    empty = (None,)\n",
    "\n",
    "    def __init__(self, size=None):\n",
    "        self._size = size or self.min_size\n",
    "        self._buckets = [None] * self._size\n",
    "        self._count = 0\n",
    "\n",
    "    def _entry(self, key):\n",
    "        # get hash\n",
    "        hash_ = hash(key)\n",
    "        idx1 = None\n",
    "\n",
    "        for i in range(self._size):\n",
    "            # quadratic probing\n",
    "            idx = (hash_ + i) % self._size\n",
    "            entry = self._buckets[idx]\n",
    "\n",
    "            # end of chain\n",
    "            if not entry:\n",
    "                break\n",
    "            # remember first empty bucket\n",
    "            elif entry is self.empty:\n",
    "                if idx1 is None:\n",
    "                    idx1 = idx\n",
    "            # test key\n",
    "            elif entry[0] == key:\n",
    "                return idx, entry\n",
    "\n",
    "        else:\n",
    "            # out of space\n",
    "            if idx1 is None:\n",
    "                raise IndexError()\n",
    "\n",
    "        # return first empty bucket\n",
    "        return (idx, None) if idx1 is None else (idx1, None)\n",
    "\n",
    "    def _ensure_capacity(self):\n",
    "        fill = self._count / self._size\n",
    "        \n",
    "        # expand or shrink?\n",
    "        if fill > self.ratio_expand:\n",
    "            self._size = self._size * 2 + 1\n",
    "        elif fill < self.ratio_shrink and self._size > self.min_size:\n",
    "            self._size = (self._size - 1) // 2\n",
    "        else:\n",
    "            return\n",
    "\n",
    "        # reallocate buckets\n",
    "        entries = self._buckets\n",
    "        self._buckets = [None] * self._size\n",
    "\n",
    "        # store entries into new buckets\n",
    "        for entry in entries:\n",
    "            if entry and entry is not self.empty:\n",
    "                idx, _ = self._entry(entry[0])\n",
    "                self._buckets[idx] = entry\n",
    "\n",
    "    def __len__(self):\n",
    "        return self._count\n",
    "\n",
    "    def __contains__(self, key):\n",
    "        _, entry = self._entry(key)\n",
    "        return bool(entry)\n",
    "\n",
    "    def __getitem__(self, key):\n",
    "        _, entry = self._entry(key)\n",
    "        return entry and entry[1]\n",
    "\n",
    "    def __setitem__(self, key, value):\n",
    "        idx, entry = self._entry(key)\n",
    "\n",
    "        # set value\n",
    "        self._buckets[idx] = key, value\n",
    "\n",
    "        # expand\n",
    "        self._count += bool(not entry or entry is self.empty)\n",
    "        self._ensure_capacity()\n",
    "\n",
    "    def __delitem__(self, key):\n",
    "        idx, entry = self._entry(key)\n",
    "\n",
    "        # delete key and value\n",
    "        if entry:\n",
    "            self._buckets[idx] = self.empty\n",
    "\n",
    "        # shrink\n",
    "        self._count -= bool(entry and entry is not self.empty)\n",
    "        self._ensure_capacity()\n",
    "\n",
    "    def __iter__(self):\n",
    "        for entry in self._buckets:\n",
    "            if entry and entry is not self.empty:\n",
    "                yield entry[0]\n",
    "\n",
    "    def slots(self):\n",
    "        return ''.join('-' if not p else 'o' if p is self.empty else 'x' for p in self._buckets)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## run"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "table = HashTable()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# add random values\n",
    "for _ in range(1000):\n",
    "    key, value = np.random.randint(1000), np.random.rand()\n",
    "    if np.random.rand() >= .5:\n",
    "        table[key] = value\n",
    "    else:\n",
    "        del table[key]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(303, 767)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(table), table._size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'-xxxxxxxx----xxxx--xxxx--xxxxxxxxx--x-xx------x-xx-xx---xxxxxxxx--------xxx-----x-x-x---x--xxxxxxx--xxxxo-x-x----xx-xxx-xo-xxx----xx---xxxx--xx-x-----xxxxxxx-xxxx-xxx-x---x-x----o---xx-----xx---xxxxx---xx-xxxxxo--xx----xxxxxxx--x--x-x-xxx-----x--------x-x---x--xx--------xx-x-------x--x-xx-x---x-x--xx-x--------x--x-x-xxx--x-----xx-----xx-xo------x-x--x-x----x-------xxx-x-x----x--x-x-----xx----xxxx----x-xx--------x---xx--x--x--x--xx--xx---x--x---x-----x--x--xx---x-x-x--x-x-ox--x-xx-x---xx--xox-xo-----x-x---xx-x---x-x----x--x-x-----x--xxx----x-----ox--xx-x--x-xx-x-xx-x---xx--xxo-ox--xx-x-x---xx-x--x-x---xx-x-ox-xxxx--x-----------x-------x-xx-xxx---xxx--x-----------xx-x-----x-------x---x------x--xxxx-------x-x---x--x-xxx---o-xx----o------x--x---xx-x---xx-xxx-x-'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "table.slots()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 0.09786699342385574\n",
      "2 0.3816624750531391\n",
      "768 0.17267164120539713\n",
      "769 0.5484495883897172\n",
      "772 0.2314199080141074\n"
     ]
    }
   ],
   "source": [
    "# print some values\n",
    "for key in list(table)[:5]:\n",
    "    print(key, table[key])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# delete all the values\n",
    "for key in list(table):\n",
    "    del table[key]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0, 11)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(table), table._size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
